# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface
from itemadapter import ItemAdapter

from bs4 import BeautifulSoup

class ScrapyMovie099Pipeline:
    def open_spider(self, spider):
        self.fp = open('movie.json', 'w', encoding='utf-8')

    def process_item(self, item, spider):
        # Parse HTML content and extract plain text
        html_content = item['plain_text']
        soup = BeautifulSoup(html_content, 'html.parser')
        plain_text = soup.get_text()

        # Update the item with plain text
        item['plain_text'] = plain_text

        # Write the updated item to JSON file
        self.fp.write(str(item) + '\n')
        return item

    def close_spider(self, spider):
        self.fp.close()
        # 在代码运行完成后运行 aa.py 脚本

